RLOOP_PEAKS <- list("RL by counts" = "analyses/RLoop_Table_Analysis/data/rloops/strategy.a__10bp__peaks.narrowPeak",
"RL by mean signal" = "analyses/RLoop_Table_Analysis/data/rloops/strategy.b__10bp__peaks.narrowPeak",
"RL by mean qval" = "analyses/RLoop_Table_Analysis/data/rloops/strategy.c__10bp__peaks.narrowPeak")
MAX_RL_SIZE <- 5000
library(magrittr)
library(tidyverse)
library(ChIPpeakAnno)
library(enrichR)
library(EnsDb.Hsapiens.v86)
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
library(ChIPseeker)
# Annotations
annoData <- toGRanges(EnsDb.Hsapiens.v86, feature = "gene")
# TSS region
tss <- getPromoters(TxDb=TxDb.Hsapiens.UCSC.hg38.knownGene,
upstream=3000,
downstream=3000)
# Annotation function
annoRL <- function(gr, max_rl_size) {
gr <- gr[width(gr) < max_rl_size,]
gr <- keepStandardChromosomes(gr, pruning.mode = "coarse")
anno <- annotatePeakInBatch(myPeakList = gr, AnnotationData = annoData,
output = "overlapping")
anno <- anno[! is.na(anno$feature),]
annodf <- as.data.frame(anno)
mapping <- AnnotationDbi::select(EnsDb.Hsapiens.v86,
keys = keys(EnsDb.Hsapiens.v86),
columns = "SYMBOL")
annodf <- left_join(annodf, mapping , by = c('feature' = "GENEID"))
anno$SYMBOL <- annodf$SYMBOL
return(anno)
}
# Read RLoops
readRL <- function(x, max_rl_size) {
readr::read_tsv(x, comment = "#", col_names = c(
"seqnames", "start", "end", "name", "score", "strand", "signalValue", "pval", "qval", "peak"
)) %>%
dplyr::filter(end - start < max_rl_size) %>%
as.data.frame() %>%
toGRanges()
}We evaluated the output from the various strategies used to generate a set of standardized Rloops.
rloops <- lapply(RLOOP_PEAKS, readRL, max_rl_size=MAX_RL_SIZE)
tagMatrixList <- lapply(
rloops, function(peaks) {
getTagMatrix(peaks, weightCol = "score", windows = tss)
}
)
peakAnnoList <- lapply(
rloops, function(peaks) {
annotatePeak(peaks, verbose = FALSE, TxDb = TxDb.Hsapiens.UCSC.hg38.knownGene)
}
)
geneLst <- lapply(rloops, function(gr) {
anno <- annoRL(gr, max_rl_size = MAX_RL_SIZE) %>% as.data.frame()
return(anno)
})plotAvgProf(tagMatrixList, xlim=c(-3000, 3000))plotDistToTSS(peakAnnoList)plotAnnoBar(peakAnnoList)enrichLinks <- plyr::llply(names(geneLst), function(groupNow) {
genePeaks <- geneLst[[groupNow]]
genesNow <- genePeaks %>%
dplyr::filter(! is.na(SYMBOL)) %>%
pull(SYMBOL) %>%
unique()
if (length(genesNow) > 5000) {
genesNow <- sample(genesNow, 5000)
}
response <- httr::POST(url = 'https://maayanlab.cloud/Enrichr/addList', body = list(
'list' = paste0(genesNow, collapse = "\n"),
'description' = groupNow
))
jsonlite::fromJSON(httr::content(response, as = "text"))
})
names(enrichLinks) <- names(geneLst)We then performed pathway enrichment on the genes overlapping with DRLA peaks and found that the pathways with increasing R-loops in E7107 treatment were largely related to spliceosome.
res <- lapply(names(geneLst), function(groupNow) {
genesNow <- geneLst[[groupNow]]
if (length(genesNow) > 5000) {
genesNow <- sample(genesNow, 5000)
}
permalinkNow <- enrichLinks[[groupNow]]$shortId
knitr::knit_child(
'pathEnrich_template.Rmd', envir = environment(), quiet = TRUE
)
})
cat(unlist(res), sep = '\n')permalink <- paste0("https://maayanlab.cloud/Enrichr/enrich?dataset=", permalinkNow[1])Pathway enrichment was calculated for RL by counts using enrichr. The permalink to this analysis is available here.
Here are some selected pathways which represents these results:
dbsNow <- c("KEGG_2021_Human", "GO_Biological_Process_2021",
"ChEA_2016", "BioPlanet_2019")
maxval <- 2000
log <- capture.output(enres <- enrichr(genesNow, databases = dbsNow))
print(enres)## $KEGG_2021_Human
## X.
## 1 "expired": true
## 2 }
##
## $GO_Biological_Process_2021
## X.
## 1 "expired": true
## 2 }
##
## $ChEA_2016
## X.
## 1 "expired": true
## 2 }
##
## $BioPlanet_2019
## X.
## 1 "expired": true
## 2 }
# plotLst <- lapply(names(enres), function(enrNow) {
# enrNowData <- enres[[enrNow]]
# enrNowData %>%
# slice_max(order_by = Combined.Score, n = 10) %>%
# # dplyr::mutate(Combined.Score = case_when(Combined.Score > maxval ~ maxval,
# # TRUE ~ Combined.Score)) %>%
# dplyr::mutate(logP = -log10(Adjusted.P.value)) %>%
# dplyr::mutate(Term = substr(Term, 1, 50)) %>%
# dplyr::mutate(Term = stringr::str_wrap(Term, width = 40)) %>%
# arrange(Combined.Score) %>%
# dplyr::mutate(Term = factor(Term, levels = unique(Term))) %>%
# ggplot(aes(x = Term, y = Combined.Score, fill = logP)) +
# geom_col() +
# ggpubr::rotate() +
# xlab(NULL) +
# ylab(paste0("Combined Score (max ", maxval, ")")) +
# scale_y_continuous(expand = c(0, 1)) +
# labs(title = enrNow, subtitle = paste0("DRLA: ", groupNow)) +
# theme_classic(base_size = 11)
# })
# ggpubr::ggarrange(plotlist = plotLst, align = "v")Here are the genes which were found in the overlap group:
tibble(
"Genes" = genesNow
) %>%
DT::datatable(extensions = 'Buttons',
options = list(dom = 'Bfrtip',
buttons = list(list(extend='copy'),
list(extend='csv', filename = groupNow),
list(extend = 'excel', filename = groupNow))))